pacman::p_load(tidyverse, DESeq2, ggVennDiagram, UpSetR, plotly, ggrepel, scico, apeglm)
# DEG object
dds1 <- readRDS("./results/deseq2_regions_dds.rds")
# The list of DEG results
res<-readRDS("./results/deseq2_regions_results_local.rds")
summary(res)
## Length Class Mode
## central 6 DESeqResults S4
## south 6 DESeqResults S4
This annotation file contains all P. cultripes transcripts by rows. As columns, we can find: * The gene IDs for P. cultripes, followed by the transcripts IDs and the peptides IDs (gene_id, transcript_id, peptide_id) * The IDs and descriptions of X. tropicalis annotated proteome resulting from both nucleotide and peptide blasting against P. cultripes transcripts (xenx_pep_id, xenx_gene_symbol, xenx_description, xenp_pep_id, xenp_gene_symbol, xenp_description).
# The annotation file
xtrop<-read.csv("./xtr109/diamondblast109.csv", stringsAsFactors = FALSE)
Extract DEGs lists in order to visualize overlapping regulated genes in the comparison of interest.
# List of DEGs
# Creating a function to extract the lists of genes repeatedly: all DEGs, up-regulated DEGs and down-regulated DEGs.
extract_degs<-function(x) {
return(
x %>%
as_tibble(rownames = "gene") %>%
filter(padj<0.05) %>%
pull(gene)
)
}
extract_up<-function(x) {
return(
x %>%
as_tibble(rownames = "gene") %>%
filter(padj<0.05) %>%
filter(log2FoldChange>0) %>%
pull(gene)
)
}
extract_down<-function(x) {
return(
x %>%
as_tibble(rownames = "gene") %>%
filter(padj<0.05) %>%
filter(log2FoldChange<0) %>%
pull(gene)
)
}
# Extracting all differentially expressed genes from all the comparisons stored in the list of DESeqResults.
sig_degs<-lapply(res, FUN=extract_degs)
str(sig_degs)
## List of 2
## $ central: chr [1:93] "PECUL23A000127" "PECUL23A000194" "PECUL23A000652" "PECUL23A001362" ...
## $ south : chr [1:983] "PECUL23A000011" "PECUL23A000059" "PECUL23A000253" "PECUL23A000300" ...
up_degs<-lapply(res, FUN=extract_up)
down_degs<-lapply(res, FUN=extract_down)
library(ggvenn)
## Loading required package: grid
sig_degs_set <- list(Central=sig_degs$central,
South=sig_degs$south)
venn_palette <- c("dodgerblue","darkblue")
ggvenn(sig_degs_set, columns = c("Central","South"),stroke_size = 0.3,
fill_color = venn_palette, stroke_color="black", show_percentage=F,
fill_alpha=0.6, set_name_color = "black", text_size=6)
# Plot Upset
upset(fromList(sig_degs),
nsets = length(sig_degs),
keep.order = T,
nintersects = 100,
number.angles = 0, point.size = 3, line.size = 1,
sets.x.label = "Number of DEGs",
set_size.show = TRUE,
set_size.scale_max = max(sapply(sig_degs, length))+200,
text.scale = c(1.2, 1.2, 1.2, 1.2, 1.5, 1.5),
sets.bar.color = c("dodgerblue","darkblue"),
order.by=c("degree","freq"))
Showing the log fold change plotted against the -log10() transformed adjusted p-values per gene.
genes_to_label <- c("mcl1", "mmp9.1")
central_data <- res$central %>%
as_tibble(rownames = "gene_id") %>%
drop_na(padj) %>% # drop all genes with NAs
filter(padj < 0.5) %>% # reduce the number of points that need to be plotted
mutate(
category = case_when(
padj < 0.05 & log2FoldChange > 0 ~ "Upregulated",
padj < 0.05 & log2FoldChange < 0 ~ "Downregulated",
TRUE ~ "Non-significant"
)
) %>%
left_join(xtrop, by = c("gene_id")) # add annotations
# Calculate the number of upregulated and downregulated genes
n_upregulated <- sum(central_data$category == "Upregulated")
n_downregulated <- sum(central_data$category == "Downregulated")
# Create the plot
gg_central <- central_data %>%
ggplot(aes(
x = log2FoldChange, y = -log10(padj), color = category,
text = paste0("</br>Pcu23 gene: ", gene_id,
"</br>X.tr peptide: ", xenp_gene_symbol,
"</br>X.tr description: ", xenp_description)
)) +
geom_point(alpha = 0.75, shape = 16) +
scale_color_manual(values = c(
"Non-significant" = "#808080",
"Upregulated" = "#FF5733",
"Downregulated" = "#0F8CBA"
)) +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "#555555") +
geom_vline(xintercept = 0, linetype = "dashed", color = "#555555") +
annotate("text", x = 5, y = max(-log10(central_data$padj), na.rm = TRUE) - 0.5,
label = paste0("Upregulated: ", n_upregulated), color = "#FF5733", hjust = 1) +
annotate("text", x = -5, y = max(-log10(central_data$padj), na.rm = TRUE) - 0.5,
label = paste0("Downregulated: ", n_downregulated), color = "#0F8CBA", hjust = 0) +
scale_x_continuous(breaks = seq(-6, 6, by = 2), limits = c(-6, 6)) +
theme_minimal() +
theme(
legend.position = "none",
) +
geom_text_repel(
data = subset(central_data, xenp_gene_symbol %in% genes_to_label),
aes(label = xenp_gene_symbol),
size = 3,
box.padding = unit(1.7, "lines"),
point.padding = unit(0.1, "lines"),
color = "black",
segment.color = 'black'
)
gg_central
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
ggsave("central_volcanoplot.jpeg", width = 7, height = 5 , dpi = 600)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
genes_to_label <- c("mmp11.1", "ass1", "sec14l3", "stard8", "klf9", "hsp90aa1.1")
extra_gene <- "PECUL23A051069T1"
extra_gene_2 <- "leprotl1"
south_data <- res$south %>%
as_tibble(rownames = "gene_id") %>%
drop_na(padj) %>% # drop all genes with NAs
filter(padj < 0.5) %>% # reduce the number of points that need to be plotted
mutate(
category = case_when(
padj < 0.05 & log2FoldChange > 0 ~ "Upregulated",
padj < 0.05 & log2FoldChange < 0 ~ "Downregulated",
TRUE ~ "Non-significant"
)
) %>%
left_join(xtrop, by = c("gene_id")) # add annotations
# Calculate the number of upregulated and downregulated genes
n_upregulated <- sum(south_data$category == "Upregulated")
n_downregulated <- sum(south_data$category == "Downregulated")
# Create the plot
gg_south <- south_data %>%
ggplot(aes(
x = log2FoldChange, y = -log10(padj), color = category,
text = paste0("</br>Pcu23 gene: ", gene_id,
"</br>X.tr peptide: ", xenp_pep_id,
"</br>X.tr description: ", xenp_description)
)) +
geom_point(alpha = 0.75, shape = 16) +
scale_color_manual(values = c(
"Non-significant" = "#808080",
"Upregulated" = "#FF5733",
"Downregulated" = "#0F8CBA"
)) +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "#555555") +
geom_vline(xintercept = 0, linetype = "dashed", color = "#555555") +
annotate("text", x = 5, y = max(-log10(south_data$padj), na.rm = TRUE) - 0.5,
label = paste0("Upregulated: ", n_upregulated), color = "#FF5733", hjust = 1) +
annotate("text", x = -5, y = max(-log10(south_data$padj), na.rm = TRUE) - 0.5,
label = paste0("Downregulated: ", n_downregulated), color = "#0F8CBA", hjust = 0) +
scale_x_continuous(breaks = seq(-6, 6, by = 2), limits = c(-6, 6)) +
theme_minimal() +
theme(
legend.position = "none",
) +
geom_text_repel(
data = subset(south_data, xenp_gene_symbol %in% genes_to_label),
aes(label = xenp_gene_symbol),
size = 3,
box.padding = unit(3, "lines"),
point.padding = unit(0, "lines"),
color = "black",
segment.color = "black",
max.overlaps = Inf
) +
geom_text_repel(
data = subset(south_data, transcript_id %in% extra_gene),
aes(label = xenp_gene_symbol),
size = 3,
box.padding = unit(2.7, "lines"),
point.padding = unit(0, "lines"),
color = "black",
segment.color = "black",
max.overlaps = Inf
) +
geom_text_repel(
data = subset(south_data, xenp_gene_symbol %in% extra_gene_2),
aes(label = xenp_gene_symbol),
size = 3,
box.padding = unit(3, "lines"),
point.padding = unit(0, "lines"),
color = "black",
segment.color = "black",
max.overlaps = Inf
)
gg_south
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
ggsave("south_volcanoplot.jpeg", width = 7, height = 5 , dpi = 600)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
We filter in the DESeq2 results object those DEGs which are significantly DE.
pull_genes<-function(x, alpha=0.05, lfc=0, signed="both", feature="xenp_gene_symbol") {
# filter by adjusted p
x<-x %>%
filter(padj<alpha)
if(signed=="up"){
x<-x %>%
filter(log2FoldChange>lfc) %>%
pull(feature)
}
if(signed=="down"){
x<-x %>%
filter(log2FoldChange<(lfc*-1)) %>%
pull(feature)
}
if(signed=="both"){
x<-x %>%
filter(abs(log2FoldChange)>lfc) %>%
pull(feature)
}
## deal with multiple gene annotations for the same gene (different transcripts)
x<-strsplit(x, ";") %>% unlist() %>% unique()
x<-x[!is.na(x)]
return(x[!is.na(x)])
}
# now extract all
degs_ids<-lapply(res, FUN=function(x)
x %>%
as_tibble(rownames = "gene_id") %>%
left_join(xtrop) %>%
pull_genes(feature = "gene_id", alpha = 0.05, lfc=0, signed = "both")
)
## Joining with `by = join_by(gene_id)`
## Joining with `by = join_by(gene_id)`
str(degs_ids)
## List of 2
## $ central: chr [1:93] "PECUL23A000127" "PECUL23A000194" "PECUL23A000652" "PECUL23A001362" ...
## $ south : chr [1:983] "PECUL23A000011" "PECUL23A000059" "PECUL23A000253" "PECUL23A000300" ...
After that, we join the annotations to those DEGs and arrange them by their lowest adjusted p-value to get more infromation on those DEGs.
extract_info <- function(sig_genes, deseq_result, xtrop) {
# Convert DESeq results to tibble
deseq_tibble <- as_tibble(deseq_result, rownames = "gene_id")
# Filter DESeq results for significant DEGs based on gene_id
filtered_deseq_result <- deseq_tibble %>%
filter(gene_id %in% sig_genes)
# Perform left join with annotation data
detailed_info <- filtered_deseq_result %>%
left_join(xtrop, by = "gene_id")
return(detailed_info)
}
sig_degs_central <- extract_info(degs_ids$central, res$central, xtrop) %>%
arrange(padj)
sig_degs_south <- extract_info(degs_ids$south, res$south, xtrop) %>%
arrange(padj)
intersection <- extract_info(intersect(sig_degs_set$Central, sig_degs_set$South), res$south, xtrop)
write.csv(sig_degs_central, file = "sig_degs_central.csv")
write.csv(sig_degs_south, file = "sig_degs_south.csv")
extract_info_by_keywords(res$central, c("mmp11", "apoptosis"), xtrop)
## # A tibble: 31 × 15
## gene_id baseMean log2FoldChange lfcSE stat pvalue padj transcript_id
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 PECUL23A… 2303. 0.577 0.127 4.52 6.09e-6 0.00374 PECUL23A0615…
## 2 PECUL23A… 96.6 1.14 0.384 2.96 3.04e-3 0.204 PECUL23A0095…
## 3 PECUL23A… 212. 0.220 0.113 1.95 5.14e-2 0.652 PECUL23A0382…
## 4 PECUL23A… 367. -0.123 0.0678 -1.82 6.92e-2 0.702 PECUL23A0177…
## 5 PECUL23A… 43.5 1.82 1.25 1.45 1.46e-1 0.796 PECUL23A0547…
## 6 PECUL23A… 1275. -0.158 0.122 -1.29 1.97e-1 0.831 PECUL23A0412…
## 7 PECUL23A… 69.5 0.204 0.159 1.28 2.00e-1 0.835 PECUL23A0180…
## 8 PECUL23A… 745. 0.250 0.221 1.13 2.58e-1 0.866 PECUL23A0234…
## 9 PECUL23A… 206. -0.155 0.148 -1.05 2.93e-1 0.887 PECUL23A0303…
## 10 PECUL23A… 725. -0.0759 0.0818 -0.928 3.53e-1 0.914 PECUL23A0577…
## # ℹ 21 more rows
## # ℹ 7 more variables: peptide_id <chr>, xenx_pep_id <chr>,
## # xenx_gene_symbol <chr>, xenx_description <chr>, xenp_pep_id <chr>,
## # xenp_gene_symbol <chr>, xenp_description <chr>
extract_info_by_keywords(res$south, c("mmp11", "apoptosis"), xtrop)
## # A tibble: 31 × 15
## gene_id baseMean log2FoldChange lfcSE stat pvalue padj transcript_id
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 PECUL23A0… 43.5 5.03 1.19 4.22 2.49e-5 0.00235 PECUL23A0547…
## 2 PECUL23A0… 160. 0.520 0.163 3.19 1.42e-3 0.0290 PECUL23A0442…
## 3 PECUL23A0… 104. -0.283 0.125 -2.27 2.33e-2 0.150 PECUL23A0044…
## 4 PECUL23A0… 206. -0.315 0.148 -2.13 3.33e-2 0.185 PECUL23A0303…
## 5 PECUL23A0… 69.5 0.304 0.158 1.93 5.35e-2 0.239 PECUL23A0180…
## 6 PECUL23A0… 1273. -0.159 0.0846 -1.88 6.08e-2 0.258 PECUL23A0303…
## 7 PECUL23A0… 202. -0.231 0.142 -1.63 1.03e-1 0.345 PECUL23A0527…
## 8 PECUL23A0… 2303. 0.202 0.127 1.59 1.12e-1 0.360 PECUL23A0615…
## 9 PECUL23A0… 98.0 -0.256 0.189 -1.35 1.76e-1 0.450 PECUL23A0342…
## 10 PECUL23A0… 220. 0.103 0.0781 1.31 1.89e-1 0.469 PECUL23A0009…
## # ℹ 21 more rows
## # ℹ 7 more variables: peptide_id <chr>, xenx_pep_id <chr>,
## # xenx_gene_symbol <chr>, xenx_description <chr>, xenp_pep_id <chr>,
## # xenp_gene_symbol <chr>, xenp_description <chr>
# we can now turn this into an interactive plot:
ggplotly(gg_central, tooltip="text")
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
ggplotly(gg_south, tooltip="text")
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues